import json
import pandas as pd

def load_and_clean_data(filepath):
    """
    读取 JSON 文件并提取关键财务指标。
    转换单位为“亿元”。
    """
    with open(filepath, 'r', encoding='utf-8') as f:
        data = json.load(f)

    results = []
    for company, reports in data.items():
        income = {item["index"]: item for item in reports["income_statement"]}
        balance = {item["index"]: item for item in reports["balance_sheets"]}
        cash = {item["index"]: item for item in reports["cash_flow_statement"]}

        for year in range(2020, 2025):
            entry = {
                "公司": company,
                "年份": year,
                "营业收入": income["营业总收入"][str(year)] / 1e4,
                "营业成本": income["营业总成本"][str(year)] / 1e4,
                "净利润": income["归属母公司净利润"][str(year)] / 1e4,
                "总资产": balance["总资产"][str(year)] / 1e4,
                "所有者权益": balance["所有者权益"][str(year)] / 1e4,
                "总负债": balance["总负债"][str(year)] / 1e4,
            }
            results.append(entry)

    df = pd.DataFrame(results)

    # 处理缺失值
    df = df.ffill().drop_duplicates()
    return df
